/*-
 * Copyright (c) 2009 Guillaume Ballet
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

#include <sys/types.h>
#include <sys/param.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <sys/pcpu.h>
#include <sys/cons.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <machine/pmap.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <machine/pcb.h>
#include <machine/machdep.h>
#include <machine/undefined.h>
#include <sys/kdb.h>
#define DEBUG_INITARM
#include "beagle.h"

struct pcpu	__pcpu;
struct pcpu	*pcpup = &__pcpu;

/* Physical page ranges */
vm_paddr_t	phys_avail[4];
vm_paddr_t	dump_avail[4];

struct pv_addr systempage;

#define FIQ_STACK_SIZE	1
#define IRQ_STACK_SIZE	1
#define ABT_STACK_SIZE	1
#define UND_STACK_SIZE	1

#define NUM_KERNEL_PTS	12

extern int _start[];
extern int _end[];

extern void beagle_putchar(unsigned char);

extern u_int data_abort_handler_address;
extern u_int prefetch_abort_handler_address;
extern u_int undefined_handler_address;

/* struct pv_addr	msgbuf */
static struct pv_addr	fiqstack;				/* Stack page descriptors for all modes */
static struct pv_addr	irqstack;
static struct pv_addr	undstack;
static struct pv_addr	abtstack;
static struct pv_addr	kernelstack;
static struct pv_addr	kernel_l1pt;				/* Level-1 page table entry */
static struct pv_addr	kernel_page_tables[NUM_KERNEL_PTS];	/* Level-2 page table entries for the kernel */

static struct trapframe	proc0_tf;

#define PHYS2VIRT(x)	((x - KERNPHYSADDR) + KERNVIRTADDR)
#define VIRT2PHYS(x)	((x - KERNVIRTADDR) + KERNPHYSADDR)

/* Macro stolen from the Xscale part, used to simplify TLB allocation */
#define valloc_pages(var, np)                   \
        alloc_pages((var).pv_pa, (np));         \
        (var).pv_va = PHYS2VIRT((var).pv_pa);	\
#ifdef VERBOSE_INIT_ARM				\
	printf("va=%p pa=%p\n", (void*)(var).pv_va, (void*)(var).pv_pa);	\
#endif

#define alloc_pages(var, np)                    \
        (var) = freemempos;             \
        freemempos += (np * PAGE_SIZE);         \
        memset((char *)(var), 0, ((np) * PAGE_SIZE));

#define round_L_page(x) (((x) + L2_L_OFFSET) & L2_L_FRAME)

#define VERBOSE_INIT_ARM

static const struct pmap_devmap omap3_devmap[] = {
	/*
         * For the moment, map devices with PA==VA.
	 */

	{
		/* 1MB of L4, covering the MMU registers */
		0x48000000,
		0x48000000,
		0x00100000,
		VM_PROT_READ|VM_PROT_WRITE,
		PTE_NOCACHE
	},
	{
		/* 1MB of L4, covering the console */
		0x49000000,
		0x49000000,
		0x00100000,
		VM_PROT_READ|VM_PROT_WRITE,
		PTE_NOCACHE
	},
	{ 0, 0, 0, 0, 0 }	/* Array terminator */
};

void *initarm(void *arg1, void *arg2)
{

	vm_offset_t	freemempos;
	int		i, j;
	volatile int	cpsr;
#ifdef VERBOSE_INIT_ARM
	long		*reg;
#endif

	pcpu_init(pcpup, 0, sizeof(struct pcpu));
	PCPU_SET(curthread, &thread0);

	set_cpufuncs();

	cpu_initclocks();

	/* Console subsystem */
	cninit();
#ifdef VERBOSE_INIT_ARM
	printf("==== MMU registers ====\n");
	for (reg=(long*)0x480bd400; reg<(long*)0x480bd46c; reg++)
		printf("%p:\t%#x\n", reg, (unsigned int)*reg);

	__asm __volatile("mrc	p15, 0, %0, c1, c0, 0 ;" : "=r" (cpsr));
	printf("cp15:c1:c0=%#x\n", cpsr);

	printf("PMAP_DOMAIN_KERNEL=%#x\n", PMAP_DOMAIN_KERNEL);
#endif

	/* ** Page reservations ************************************************************************* */

	/* Initialize freemempos. Pages are allocated from the end of the RAM's first 64MB, as it is what */
	/* is covered by the default TLB in locore.S.							  */
	freemempos	= VIRT2PHYS(round_L_page( /*(long)_end)) */ fake_preload_metadata()));
	printf("freemempos=%p %d %d\n", (void*)freemempos, L1_S_SIZE, L1_TABLE_SIZE);

	/* Reserve L1 table pages now, as freemempos is 64K-aligned */
	valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE);
	printf("l1pa=%p l1va=%p\n", (void*)kernel_l1pt.pv_pa, (void*)kernel_l1pt.pv_va);

	/* Reserve the paging system pages, page #0 is reserved as a L2 table for the exception vector. */
	for (i=0; i<NUM_KERNEL_PTS; i++) {
		if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) {
			valloc_pages(kernel_page_tables[i], L2_TABLE_SIZE / PAGE_SIZE);
		} else {
			j				= i % (PAGE_SIZE / L2_TABLE_SIZE_REAL);
			kernel_page_tables[i].pv_pa	= kernel_page_tables[i - j].pv_pa + (j * L2_TABLE_SIZE_REAL);
			kernel_page_tables[i].pv_va	= kernel_page_tables[i - j].pv_va + (j * L2_TABLE_SIZE_REAL);
		}
	}

	/* Handle system page, where the interrupt vector is stored. */
	valloc_pages(systempage, 1);
	systempage.pv_va	= ARM_VECTORS_HIGH;

	/* Stack pages */
	valloc_pages(fiqstack, FIQ_STACK_SIZE);
	valloc_pages(irqstack, IRQ_STACK_SIZE);
	valloc_pages(abtstack, ABT_STACK_SIZE);
	valloc_pages(undstack, UND_STACK_SIZE);
	valloc_pages(kernelstack, KSTACK_PAGES + 1);

	/* ** Build the TLBs **************************************************************************** */

	/* L2 table for the exception vector */
	pmap_link_l2pt(kernel_l1pt.pv_va, ARM_VECTORS_HIGH & ~(0x100000 - 1), &kernel_page_tables[0]);

	/* Insert a reference to the kernel L2 page tables into the L1 page. */
	for (i=1; i<NUM_KERNEL_PTS; i++) {
		pmap_link_l2pt(kernel_l1pt.pv_va,
				KERNVIRTADDR + (i-1) * 0x100000,
				&kernel_page_tables[i]);
	}

	/* Map the kernel */
	size_t	textsize = round_L_page((long)etext - KERNVIRTADDR);
	size_t	totalsize = round_L_page((long) _end - KERNVIRTADDR);

#ifdef VERBOSE_INIT_ARM
	printf(".text=%#x total=%#x _end=%#x %#x %#x\n", textsize, totalsize, (unsigned int)_end, (unsigned int) etext, KERNVIRTADDR);
#endif

	vm_offset_t	offset = 0;
	offset += pmap_map_chunk(kernel_l1pt.pv_va, KERNVIRTADDR + offset,
			KERNPHYSADDR + offset, textsize,
			VM_PROT_READ|VM_PROT_EXECUTE, PTE_CACHE);

	offset += pmap_map_chunk(kernel_l1pt.pv_va, KERNVIRTADDR + offset,
			KERNPHYSADDR + offset, totalsize - textsize,
			VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);

	/* Map the L1 page table */
	pmap_map_chunk(kernel_l1pt.pv_va,
			kernel_l1pt.pv_va, kernel_l1pt.pv_pa,
			L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE,
			PTE_PAGETABLE);

	/* Map the L2 page tables */
	for (i=0; i<NUM_KERNEL_PTS; i++) {
		pmap_map_chunk(kernel_l1pt.pv_va,
			kernel_page_tables[i].pv_va, kernel_page_tables[i].pv_pa,
			L2_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE,
			PTE_PAGETABLE);
	}

	/* Map the interrupt vector */
	pmap_map_entry(kernel_l1pt.pv_va, ARM_VECTORS_HIGH, systempage.pv_pa,
			VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);

	/* Map the stack pages */
#ifdef VERBOSE_INIT_ARM
	printf("Mapping the stack pages\n");
#endif
	pmap_map_chunk(kernel_l1pt.pv_pa, fiqstack.pv_va, fiqstack.pv_pa,
			FIQ_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
	pmap_map_chunk(kernel_l1pt.pv_pa, irqstack.pv_va, irqstack.pv_pa,
			IRQ_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
	pmap_map_chunk(kernel_l1pt.pv_pa, abtstack.pv_va, abtstack.pv_pa,
			ABT_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
	pmap_map_chunk(kernel_l1pt.pv_pa, undstack.pv_va, undstack.pv_pa,
			UND_STACK_SIZE * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);
	pmap_map_chunk(kernel_l1pt.pv_pa, kernelstack.pv_va, kernelstack.pv_pa,
			(KSTACK_PAGES+1) * PAGE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE);

	/* Device map */
	pmap_devmap_bootstrap(kernel_l1pt.pv_pa, omap3_devmap);
	//dump_table((void*)kernel_l1pt.pv_va, 0, 16384, "initial");

	/* ** Switch L1 TLB table *********************************************************************** */
#ifdef VERBOSE_INIT_ARM
	printf("DOMAIN_CLIENT=%#x, PMAP_DOMAIN_KERNEL=%#x\n", DOMAIN_CLIENT, PMAP_DOMAIN_KERNEL);
#endif
	cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_MANAGER);
	setttb(kernel_l1pt.pv_pa);
	cpu_tlb_flushID();
	//cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)));
	/* Do not set the domain for now. The beagle board seems to have problems with the backward-compatibility mode */

#ifdef VERBOSE_INIT_ARM
	printf("Now using the new L1.\n");
#endif

	/* ** Set stack frames ************************************************************************** */
	set_stackptr(PSR_FIQ32_MODE, fiqstack.pv_va + FIQ_STACK_SIZE * PAGE_SIZE);
	set_stackptr(PSR_IRQ32_MODE, irqstack.pv_va + IRQ_STACK_SIZE * PAGE_SIZE);
	set_stackptr(PSR_ABT32_MODE, abtstack.pv_va + ABT_STACK_SIZE * PAGE_SIZE);
	set_stackptr(PSR_UND32_MODE, undstack.pv_va + UND_STACK_SIZE * PAGE_SIZE);

#ifdef VERBOSE_INIT_ARM
	printf("STACK: %p\n", (void *)(fiqstack.pv_va + FIQ_STACK_SIZE * PAGE_SIZE));
	printf("STACK: %p\n", (void*)(irqstack.pv_va + IRQ_STACK_SIZE * PAGE_SIZE));
	printf("STACK: %p\n", (void *)(abtstack.pv_va + ABT_STACK_SIZE * PAGE_SIZE));
	printf("STACK: %p\n", (void *)(undstack.pv_va + UND_STACK_SIZE * PAGE_SIZE));
#endif

	/* ** Miscellaneous ***************************************************************************** */

	/* Exception handlers */
	data_abort_handler_address	= (u_int) data_abort_handler;
	prefetch_abort_handler_address	= (u_int) prefetch_abort_handler;
	undefined_handler_address	= (u_int) undefinedinstruction_bounce;
	undefined_init();

	/* Prepares the context of the first process */
	proc_linkup(&proc0, &thread0);
	thread0.td_kstack		= kernelstack.pv_va;
	thread0.td_pcb			= (struct pcb *) (thread0.td_kstack + (KSTACK_PAGES + 1) * PAGE_SIZE) - 1;
	thread0.td_pcb->pcb_flags	= 0;
	thread0.td_frame		= &proc0_tf;
	pcpup->pc_curpcb		= thread0.td_pcb;

	/* Exception vector */
#ifdef VERBOSE_INIT_ARM 
	printf("Exception vector at (%#x)\n", ((unsigned int *)systempage.pv_va)[0]);
#endif
	arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL);

	/* First unbacked address of KVM */
	pmap_curmaxkvaddr	= KERNVIRTADDR + 0x100000 * NUM_KERNEL_PTS;

	/* Physical ranges of available memory. */
	phys_avail[0]	= freemempos;
	phys_avail[1]	= PHYSADDR + BEAGLE_MEMSIZE;
	phys_avail[2]	= 0;
	phys_avail[3]	= 0;

	dump_avail[0]	= PHYSADDR;
	dump_avail[1]	= PHYSADDR + BEAGLE_MEMSIZE;
	dump_avail[2]	= 0;
	dump_avail[3]	= 0;

	physmem		= BEAGLE_MEMSIZE / PAGE_SIZE;

	init_param1();
	init_param2(physmem);

	pmap_bootstrap((freemempos&0x007fffff)|0xc0000000, KERNVIRTADDR+0x10000000, &kernel_l1pt);

	/* Locking system */
	mutex_init();

	/* Kernel debugger */
	kdb_init();

	/* initarm returns the address of the kernel stack */
	return (void *)(kernelstack.pv_va + (KSTACK_PAGES + 1) * PAGE_SIZE);
}

